Contents

0.1 libraries

source("functions_seurat.r")
library(pheatmap)
library(Seurat)
## Attaching SeuratObject
library(ggplot2)
library(ggplot2)
FINDALLMARKERS <- FALSE #  time consuming, run and save

0.2 SVZ

0.2.1 Run Seurat

counts <- readRDS("counts_SVZ.rds")
meta.data <- readRDS("meta.data_SVZ.rds")
svzObj <- RunSeurat(counts = counts,meta.data = meta.data,nPCA = 35, resolution = 0.6)
ElbowPlot(svzObj,ndims = 35)

JackStrawPlot(svzObj,reduction = "pca",dims = 1:35)
## Warning: Removed 57741 rows containing missing values (`geom_point()`).

VlnPlot(svzObj,"percent.ribo")

VlnPlot(svzObj,"percent.mt")

0.2.2 doublets predictions from scrublet

pheatmap_w(table(svzObj$scrublet.predicted_doublets,svzObj$clusters_annotated),fontsize_number = 15)

0.2.3 visualizing some markers

DimPlot(svzObj,label = TRUE)

TSNEPlot(svzObj, label = TRUE)

svzObj@reductions$umap <- NULL
FeaturePlot(svzObj,features = c("PTPRZ1","BCAN"))

FeaturePlot(svzObj,features = c("SOX4","SOX11"))

FeaturePlot(svzObj,features = c("FABP7","EGFR"))

0.2.4 Cell cycle scores

FeaturePlot(svzObj,features = c("G2M.Score"),cols = cols.fp,pt.size=1)

FeaturePlot(svzObj,features = c("S.Score"),cols = cols.fp,pt.size=1)

0.2.5 number of cells per samples and clusters

x=as.matrix(table(svzObj$sample,svzObj$res.0.6))
pheatmap_w(x)

x=as.matrix(table(svzObj$sample,svzObj$clusters_annotated))
pheatmap_w(x)

0.2.6 selecting NSC-like cells

idents <- "NSC-like"
sub1 <- subset(svzObj, idents = idents )
cells <- WhichCells(svzObj,idents = idents)
fig1 <- TSNEPlot(svzObj, label = TRUE)
fig2 <- TSNEPlot(svzObj,cells.highlight =  cells);fig2$data$highlight <-  plyr::mapvalues(fig2$data$highlight,"Group_1","Selected")
print(fig1+fig2)

if(FINDALLMARKERS){
  svzMk <- FindAllMarkers(svzObj)
}

0.3 Fetal (Couturier 2020)

counts <- readRDS("counts_Fetal.rds")
meta.data <- readRDS("meta.data_Fetal.rds")
meta.data$platform <- "seq10x"
fetalObj <- RunSeurat(counts = counts,meta.data = meta.data,nPCA = 30, resolution = 0.6)
ElbowPlot(fetalObj,ndims = 30)

JackStrawPlot(fetalObj,reduction = "pca",dims = 1:30)
## Warning: Removed 22650 rows containing missing values (`geom_point()`).

VlnPlot(fetalObj,"percent.ribo")

VlnPlot(fetalObj,"percent.mt")

DimPlot(fetalObj, label = TRUE)

FeaturePlot(fetalObj,features = c("G2M.Score"),cols = cols.fp,pt.size=1)

FeaturePlot(fetalObj,features = c("S.Score"),cols = cols.fp,pt.size=1)

if(FINDALLMARKERS){
  fetalMk <- FindAllMarkers(fetalObj)
}

0.3.1 selecting RG/progenitor clusters

idents <- c("RG","RG.Astro.Epend","RG.Astro","OPC")
sub2 <- subset(fetalObj, idents = idents )
cells <- WhichCells(fetalObj,idents = idents)
fig1 <- DimPlot(fetalObj, label = TRUE)
fig2 <- DimPlot(fetalObj,cells.highlight =  cells);fig2$data$highlight <-  plyr::mapvalues(fig2$data$highlight,"Group_1","Selected")
print(fig1+fig2)

0.4 Fetal Nowakowski 2017, cells and clusters information

df=read.delim("sampleinfo_nowak.txt")
head(df)
##                  Cell WGCNAcluster    Name Age RegionName Laminae Area
## 1 Hi_GW21_1.Hi_GW21_1   nEN-early2 Sample1  19     Cortex     All    0
## 2 Hi_GW21_2.Hi_GW21_2   nEN-early2 Sample1  19     Cortex     All    0
## 3 Hi_GW21_3.Hi_GW21_3     nEN-late Sample1  19     Cortex     All    0
## 4 Hi_GW21_7.Hi_GW21_7      EN-V1-2 Sample1  19     Cortex     All    0
## 5 Hi_GW21_6.Hi_GW21_6      EN-V1-2 Sample1  19     Cortex     All    0
## 6 Hi_GW21_4.Hi_GW21_4         Glyc Sample1  19     Cortex     All    0
x <- as.matrix(table(df$Name))

0.4.1 selected clusters

selected_clusters <- c("tRG","RG-div1","oRG","vRG","RG-div2","OPC","Astrocyte","RG-early")

0.5 merging the 3 datasets

0.5.1 selected SVZ and Fetal clusters

counts <- readRDS("counts_sub123.rds")
meta.data <- readRDS("metadata_sub123.rds")
table(meta.data$platform,meta.data$DataSet)
##         
##          Fetal_C Fetal_N SVZ
##   P1_10x     861       0 470
##   P2           0     634   0
table(meta.data$orig.annotation)
## 
##            C-OPC             C-RG       C-RG-Astro C-RG-Astro-Epend 
##               81              592              151               37 
##      N-Astrocyte            N-eRG            N-OPC            N-oRG 
##               46               50               37               62 
##        N-RG-div1        N-RG-div2            N-tRG            N-vRG 
##              122              120              100               97 
##            SVZ-5 
##              470

0.5.2 Run Seurat

mergedObj <- RunSeurat(counts = counts,meta.data = meta.data,nPCA = 20, resolution = 0.8,vars.to.regress = "platform")
ElbowPlot(mergedObj)

JackStrawPlot(mergedObj,reduction = "pca",dims = 1:20)
## Warning: Removed 44641 rows containing missing values (`geom_point()`).

0.5.3 visualization original annotations

DimPlot(mergedObj,group.by = "orig.annotation",label = TRUE)

0.5.4 visualization of Seurat clusters

DimPlot(mergedObj, label = TRUE)

0.5.5 visualizing some markers

FeaturePlot(mergedObj,features = c("EGFR","OLIG1"))

FeaturePlot(mergedObj,features = c("CKB","VIM"))

FeaturePlot(mergedObj,features = c("GFAP","SOX9"))

0.5.6 add annotation

clusters=c(4,5,7,9)
names(clusters)=c("npNSC","aRG1","aRG2","opNSC")
for(i in 1:length(clusters)){
cells = WhichCells(mergedObj,expression = seurat_clusters == clusters[i] & DataSet=="SVZ")
mergedObj@meta.data[cells,"annotation"] <- names(clusters)[i]
}

0.5.7 visualization of SVZ cells annotations

DimPlot(mergedObj,group.by = "annotation",label = TRUE)

if(FINDALLMARKERS){
  mergedMk <- FindAllMarkers(mergedObj)
}

0.5.8 number of cells per annotated clusters

x <- as.matrix(table(mergedObj$orig.annotation,mergedObj$seurat_clusters))
pheatmap_w(x)

x=as.matrix(table(mergedObj$sample,mergedObj$seurat_clusters))
pheatmap_w(x)

x=as.matrix(table(mergedObj$sample,mergedObj$annotation))
pheatmap_w(x)

0.5.9 cell cycle scores and plots

FeaturePlot(mergedObj,features = c("G2M.Score"),cols = cols.fp,pt.size=1)

FeaturePlot(mergedObj,features = c("S.Score"),cols = cols.fp,pt.size=1)

0.5.10 Ribosomal protein genes are high in eRG and aRG1, with low transcriptome size and similar percentage of mitochondrial genes, within each dataset

VlnPlot(subset(mergedObj,subset = DataSet == "Fetal_N"),"percent.ribo",group.by = "orig.annotation")

VlnPlot(subset(mergedObj,subset = annotation %in% c("aRG1","aRG2","opNSC","npNSC")),"percent.ribo",group.by = "annotation")

VlnPlot(subset(mergedObj,subset = DataSet == "Fetal_N"),"nFeature_RNA",group.by = "orig.annotation")

VlnPlot(subset_SVZ(mergedObj),"nFeature_RNA",group.by = "annotation")

VlnPlot(subset(mergedObj,subset = DataSet == "Fetal_N"),"percent.mt",group.by = "orig.annotation")

VlnPlot(subset(mergedObj,subset = annotation %in% c("aRG1","aRG2","opNSC","npNSC")),"percent.mt",group.by = "annotation")

0.6 Markers expression visualization on dotplot

df=scan("/aloes/pj/1p/43_SVZ_color/genes_dotplot_fig2", what = character());df
##  [1] "VIM"      "HIST1H4C" "FABP7"    "CCND1"    "CCND2"    "CDK4"    
##  [7] "PCNA"     "ASCL1"    "SOX2"     "SOX4"     "SOX11"    "ELAVL4"  
## [13] "STMN2"    "DCX"      "EGFR"     "PTPRZ1"   "SPARCL1"  "FAM107A" 
## [19] "GFAP"     "NES"      "CLU"      "CRYAB"    "ALDOC"    "SOX9"    
## [25] "S100B"    "NCAN"     "MEG3"     "ID3"      "OLIG1"    "OLIG2"   
## [31] "GPR17"    "BCAS1"    "SGK1"     "PTGDS"    "SOX10"    "VCAN"    
## [37] "SIRT2"    "CD74"     "RGS1"     "TNR"
df=read.delim("/aloes/pj/1p/43_SVZ_color/nsc4_top10_4culumns.txt");
df <- df[order(df$avg_log2FC,decreasing = TRUE),]
df <- df[!duplicated(df$gene),]
df <- df[order(df$avg_log2FC,decreasing = TRUE),]
df <- df[order(df$cluster),]
df <- df$gene
df=rev(df)
length(df)
## [1] 33
DotPlot(svzObj,features = df,)+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1,size = 10))

ss <- subset_SVZ(mergedObj)

DotPlot(ss,features = df,group.by = "annotation")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1,size = 10))
## Warning: Scaling data with a low number of groups may produce misleading results